from data_preprocessing import load_and_preprocess_data, prepare_features
from model_training import train_and_evaluate_models
from prediction import batch_predict
import pandas as pd

def main():
    # 文件路径
    train_path = 'train.csv'
    test_path = 'test.csv'

    # 1. 加载和预处理数据
    print("正在加载和预处理数据...")
    train_data, test_data = load_and_preprocess_data(train_path, test_path)

    # 2. 准备特征
    print("\n正在准备特征数据...")
    X_train, y_train, X_test, selected_feature_names, policy_ids = prepare_features(train_data, test_data)

    # 3. 训练和评估模型
    print("\n正在训练模型...")
    best_model = train_and_evaluate_models(X_train, y_train, selected_feature_names)

    # 4. 预测测试数据
    print("\n正在预测测试数据...")
    predictions = batch_predict(X_test, policy_ids)

    # 5. 保存预测结果
    predictions.to_csv('insurance_fraud_predictions.csv', index=False)
    print("\n预测结果已保存到 insurance_fraud_predictions.csv")


if __name__ == '__main__':
    main()